Draft

The Condorcet Jury Theorem and Democratic Rationality

Sensitivity Analysis of Failure Conditions

theory
simulation
causal inference
sensitivity analysis
import pandas as pd
import numpy as np
import pymc as pm
import matplotlib.pyplot as plt
import arviz as az
import seaborn as sns
# Set random seed for reproducibility
np.random.seed(42)

# ============================================================================
# SIMULATE DATA
# ============================================================================
# Simulate some jury voting data
n_cases = 50  # number of cases
n_jurors = 15  # number of jurors

# True states (1 = guilty, 0 = not guilty)
true_states = np.random.binomial(1, 0.5, n_cases)

# Individual juror competencies (for simulation)
true_p = 0.7  # average competence
true_discrimination = 0.15  # how much competencies vary (sd in logit space)

# Simulate heterogeneous competencies
logit_p_jurors = np.random.normal(np.log(true_p / (1 - true_p)), 
                                   true_discrimination, 
                                   n_jurors)
p_jurors = 1 / (1 + np.exp(-logit_p_jurors))

# Simulate votes
votes = np.zeros((n_cases, n_jurors))
for i in range(n_cases):
    for j in range(n_jurors):
        if true_states[i] == 1:
            votes[i, j] = np.random.binomial(1, p_jurors[j])
        else:
            votes[i, j] = np.random.binomial(1, 1 - p_jurors[j])

print(f"Data simulated: {n_cases} cases, {n_jurors} jurors")
print(f"True average competence: {p_jurors.mean():.3f}")
print(f"Majority vote accuracy: {(votes.mean(axis=1) > 0.5).mean():.3f}")
Data simulated: 50 cases, 15 jurors
True average competence: 0.696
Majority vote accuracy: 0.400
# Define different prior specifications
prior_specs = {
    'weakly_informative': {'alpha': 3, 'beta': 2, 'desc': 'Weakly informative (centered at 0.6)'},
    'strong_competence': {'alpha': 10, 'beta': 5, 'desc': 'Strong prior (p ~ 0.67)'},
    'barely_competent': {'alpha': 6, 'beta': 5, 'desc': 'Skeptical prior (p ~ 0.55)'},
    'incompetent': {'alpha': 5, 'beta': 10, 'desc': 'Incompetent prior (p ~ 0.33)'},
}
def fit_condorcet_base_model(votes, spec):

    with pm.Model() as model:
        # SENSITIVITY PARAMETER: Prior on competence
        # This is our first example of treating assumptions as parameters
        p = pm.Beta('p', alpha=spec['alpha'], beta=spec['beta'])
        
        # True state of each case (latent)
        true_state = pm.Bernoulli('true_state', p=0.5, shape=n_cases)
        
        # Likelihood
        vote_prob = pm.math.switch(
            pm.math.eq(true_state[:, None], 1),
            p,
            1 - p
        )
        
        likelihood = pm.Bernoulli('votes', p=vote_prob, observed=votes)
        
        # Posterior predictive: majority vote accuracy for different jury sizes
        jury_sizes_eval = [3, 7, 15]
        for size in jury_sizes_eval:
            # Simulate votes for a new case (truth = 1)
            votes_sim = pm.Bernoulli(f'sim_votes_{size}', p=p, shape=size)
            majority_correct = pm.Deterministic(
                f'majority_correct_{size}',
                pm.math.sum(votes_sim) > size / 2
            )
        
        # Sample
        idata = pm.sample_prior_predictive()
        idata.extend(pm.sample(2000, tune=1000, random_seed=42,
                                       target_accept=0.95, return_inferencedata=True))
        idata.extend(pm.sample_posterior_predictive(idata))

    return idata, model

traces = {}

for prior_name, spec in prior_specs.items():
    print(f"\nFitting with {spec['desc']}...")
    idata, model = fit_condorcet_base_model(votes, spec)
    traces[prior_name] = idata
    traces[prior_name + '_model'] = model

Fitting with Weakly informative (centered at 0.6)...
Sampling: [p, sim_votes_15, sim_votes_3, sim_votes_7, true_state, votes]
Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>NUTS: [p]
>BinaryGibbsMetropolis: [true_state, sim_votes_3, sim_votes_7, sim_votes_15]

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 6 seconds.
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
Sampling: [votes]

Sampling: [p, sim_votes_15, sim_votes_3, sim_votes_7, true_state, votes]

Fitting with Strong prior (p ~ 0.67)...
Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>NUTS: [p]
>BinaryGibbsMetropolis: [true_state, sim_votes_3, sim_votes_7, sim_votes_15]

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 6 seconds.
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
Sampling: [votes]

Sampling: [p, sim_votes_15, sim_votes_3, sim_votes_7, true_state, votes]

Fitting with Skeptical prior (p ~ 0.55)...
Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>NUTS: [p]
>BinaryGibbsMetropolis: [true_state, sim_votes_3, sim_votes_7, sim_votes_15]

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 6 seconds.
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
Sampling: [votes]

Sampling: [p, sim_votes_15, sim_votes_3, sim_votes_7, true_state, votes]

Fitting with Incompetent prior (p ~ 0.33)...
Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>NUTS: [p]
>BinaryGibbsMetropolis: [true_state, sim_votes_3, sim_votes_7, sim_votes_15]

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 6 seconds.
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
Sampling: [votes]

print("\n" + "="*70)
print("PRIOR SENSITIVITY RESULTS")
print("="*70)
ests = {}
for prior_name in prior_specs.keys():
    for i in [3, 7, 15]:
        p = traces[prior_name]['prior'][f'majority_correct_{i}'].mean().item()
        if prior_name in ests:
            ests[prior_name].append(p)
        else: 
            ests[prior_name] = [p]
    

prior_estimates = pd.DataFrame(ests, index=['Correct % for Majority of 3', 'Correct % for Majority of 7', 'Correct % for Majority of 15'])
prior_estimates

======================================================================
PRIOR SENSITIVITY RESULTS
======================================================================
weakly_informative strong_competence barely_competent incompetent
Correct % for Majority of 3 0.652 0.726 0.540 0.282
Correct % for Majority of 7 0.672 0.806 0.578 0.200
Correct % for Majority of 15 0.708 0.824 0.610 0.144
print("\n" + "="*70)
print("PRIOR SENSITIVITY RESULTS")
print("="*70)
ests = {}
for prior_name in prior_specs.keys():
    for i in [3, 7, 15]:
        p = traces[prior_name]['posterior'][f'majority_correct_{i}'].mean().item()
        if prior_name in ests:
            ests[prior_name].append(p)
        else: 
            ests[prior_name] = [p]
    

posterior_estimates = pd.DataFrame(ests, index=['Correct % for Majority of 3', 'Correct % for Majority of 7', 'Correct % for Majority of 15'])
posterior_estimates

======================================================================
PRIOR SENSITIVITY RESULTS
======================================================================
weakly_informative strong_competence barely_competent incompetent
Correct % for Majority of 3 0.776750 0.777750 0.776125 0.22150
Correct % for Majority of 7 0.868375 0.866750 0.868000 0.13225
Correct % for Majority of 15 0.949000 0.948875 0.945375 0.05000
fig, axs = plt.subplots(1, 2, figsize=(20, 5))
axs = axs.flatten()
jitters = np.linspace(-0.4, 0.4, 3)

for prior_name in prior_specs.keys():
    axs[0].plot(prior_estimates.index, prior_estimates[prior_name], label=prior_name + ' prior', marker='o')
    axs[1].plot(posterior_estimates.index, posterior_estimates[prior_name], label=prior_name + ' prior', marker='o')
axs[0].legend()
axs[1].legend()
axs[0].set_title("Prior Estimates for Majority Accuracy");
axs[1].set_title("Posterior Estimates for Majority Accuracy");

# Define different priors on discrimination parameter
discrimination_priors = {
    'weak_discrimination': {'sigma': 0.5, 'desc': 'Weak discrimination prior (σ ~ 0.25)'},
    'moderate_discrimination': {'sigma': 1.0, 'desc': 'Moderate discrimination prior (σ ~ 0.5)'},
    'strong_discrimination': {'sigma': 2.0, 'desc': 'Strong discrimination prior (σ ~ 2)'},
}


def fit_discrimination_binomial_model(votes, n_jurors, priors):
    majority_votes = (votes.mean(axis=1) > 0.5).astype(int)
    agreements_per_juror = np.array([(votes[:, j] == majority_votes).sum() for j in range(n_jurors)])
    empirical_competence = agreements_per_juror / n_cases

    with pm.Model() as sensitivity_model:
        # Hyperpriors for the population distribution
        mu_logit_p = pm.Normal('mu_logit_p', mu=0.6, sigma=0.5)
        
        # KEY SENSITIVITY PARAMETER: individual discrimination
        sigma_logit_p = pm.HalfNormal('sigma_logit_p', sigma=spec['sigma'])
        
        # NON-CENTERED PARAMETERIZATION for better sampling
        # Use: logit_p_juror = mu + sigma * z, where z ~ Normal(0, 1)
        z_juror = pm.Normal('z_juror', mu=0, sigma=1, shape=n_jurors)
        logit_p_juror = pm.Deterministic('logit_p_juror', 
                                        mu_logit_p + sigma_logit_p * z_juror)
        p_juror = pm.Deterministic('p_juror', pm.math.invlogit(logit_p_juror))
        
        # Mean competence
        mean_p = pm.Deterministic('mean_p', pm.math.invlogit(mu_logit_p))
        
        # Likelihood
        pm.Binomial('agreements', 
                   n=n_cases, 
                   p=p_juror, 
                   observed=agreements_per_juror)
        
        # Sample with non-centered parameterization
        idata = pm.sample_prior_predictive()
        idata.extend(pm.sample(
            1000,
            tune=2000,
            random_seed=42,
            target_accept=0.95,
            return_inferencedata=True,
            idata_kwargs={"log_likelihood": True}
        )
        )
        idata.extend(pm.sample_posterior_predictive(idata))

    return idata, model


traces_discrimination = {}

for prior_name, spec in discrimination_priors.items():
    print(f"\nFitting with {spec['desc']}...")
    idata, model = fit_discrimination_binomial_model(votes, n_jurors, spec)
    traces_discrimination[prior_name] = idata
    traces_discrimination[prior_name + '_model'] = model
Sampling: [agreements, mu_logit_p, sigma_logit_p, z_juror]
Initializing NUTS using jitter+adapt_diag...

Fitting with Weak discrimination prior (σ ~ 0.25)...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu_logit_p, sigma_logit_p, z_juror]

Sampling 4 chains for 2_000 tune and 1_000 draw iterations (8_000 + 4_000 draws total) took 2 seconds.
Sampling: [agreements]

Sampling: [agreements, mu_logit_p, sigma_logit_p, z_juror]
Initializing NUTS using jitter+adapt_diag...

Fitting with Moderate discrimination prior (σ ~ 0.5)...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu_logit_p, sigma_logit_p, z_juror]

Sampling 4 chains for 2_000 tune and 1_000 draw iterations (8_000 + 4_000 draws total) took 2 seconds.
Sampling: [agreements]

Sampling: [agreements, mu_logit_p, sigma_logit_p, z_juror]
Initializing NUTS using jitter+adapt_diag...

Fitting with Strong discrimination prior (σ ~ 2)...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu_logit_p, sigma_logit_p, z_juror]

Sampling 4 chains for 2_000 tune and 1_000 draw iterations (8_000 + 4_000 draws total) took 2 seconds.
Sampling: [agreements]

import numpy as np

# ============================================================
# CONSOLIDATED POSTERIOR PREDICTIVE WORKFLOW
# ============================================================

# Required inputs:
#   idata        : InferenceData from collapsed (binomial) model
#   n_cases      : number of cases
#   n_jurors     : number of jurors
#   jury_sizes   : list of jury sizes to evaluate (e.g. [3, 5, 7, 10, 15])

jury_sizes = [3, 5,  7, 10, 15]

# ------------------------------------------------------------
# 1. Generative expansion: truth -> votes
# ------------------------------------------------------------

def simulate_votes(p_juror, n_cases, truth=None):
    """
    p_juror : (n_jurors,)
    returns:
        truth : (n_cases,)
        votes : (n_cases, n_jurors)
    """
    if truth is None:
        truth = np.random.binomial(1, 0.5, size=n_cases)
    votes = np.zeros((n_cases, n_jurors), dtype=int)

    for i in range(n_cases):
        for j in range(n_jurors):
            prob = p_juror[j] if truth[i] == 1 else 1 - p_juror[j]
            votes[i, j] = np.random.binomial(1, prob)

    return truth, votes


# ------------------------------------------------------------
# 3. Diagnostic functions
# ------------------------------------------------------------

def majority_accuracy(votes, truth):
    majority = votes.mean(axis=1) > 0.5
    return np.mean(majority == truth)


def unanimity_rate(votes):
    return np.mean(
        (votes.sum(axis=1) == 0) |
        (votes.sum(axis=1) == votes.shape[1])
    )


def juror_agreement_rates(votes, truth):
    return np.mean(votes == truth[:, None], axis=0)


def error_correlation(votes, truth):
    errors = votes != truth[:, None]
    return np.corrcoef(errors.T)


def majority_accuracy_for_size(votes, truth, jury_size):
    n_cases, n_jurors = votes.shape
    correct = np.zeros(n_cases, dtype=int)

    for i in range(n_cases):
        jurors = np.random.choice(
            n_jurors, size=jury_size, replace=False
        )
        sub_votes = votes[i, jurors]
        majority = sub_votes.mean() > 0.5
        correct[i] = (majority == truth[i])

    return correct.mean()


# ------------------------------------------------------------
# 4. Run posterior predictive simulations
# ------------------------------------------------------------

def run_post_fit_ppc(p_juror_samples, n_cases, truth=None, jury_sizes=[3, 5, 7, 10, 15]):
    # Storage
    n_samples = p_juror_samples.shape[1]
    n_jurors = p_juror_samples.shape[0]
    ppc_results = {
        "majority_accuracy_15": np.zeros(n_samples),
        "unanimity_rate_15": np.zeros(n_samples),
        "agreement_rates": np.zeros((n_samples, n_jurors)),
        "error_corr": np.zeros((n_samples, n_jurors, n_jurors)),
        "majority_accuracy_by_size": {
            k: np.zeros(n_samples) for k in jury_sizes
        }
    }
    for s in range(n_samples):
        truth_s, votes_s = simulate_votes(
            p_juror_samples[:, s],
            n_cases, 
            truth
        )

        # Full jury diagnostics
        ppc_results["majority_accuracy_15"][s] = (
            majority_accuracy(votes_s, truth_s)
        )
        ppc_results["unanimity_rate_15"][s] = (
            unanimity_rate(votes_s)
        )
        ppc_results["agreement_rates"][s] = (
            juror_agreement_rates(votes_s, truth_s)
        )
        ppc_results["error_corr"][s] = (
            error_correlation(votes_s, truth_s)
        )

        # Sub-jury diagnostics
        for k in jury_sizes:
            ppc_results["majority_accuracy_by_size"][k][s] = (
                majority_accuracy_for_size(votes_s, truth_s, k)
            )

    return ppc_results

def summarise_error_corr(ppc_results):
    corr = ppc_results["error_corr"]  # (samples, jurors, jurors)
    n = corr.shape[1]

    off_diag = []
    for s in range(corr.shape[0]):
        mat = corr[s]
        off_diag.append(
            mat[np.triu_indices(n, k=1)]
        )

    off_diag = np.concatenate(off_diag)

    return {
        "mean_off_diag": off_diag.mean(),
        "sd_off_diag": off_diag.std(),
        "p95_abs_corr": np.percentile(np.abs(off_diag), 95),
    }


# ------------------------------------------------------------
# 5. Summaries (example outputs)
# ------------------------------------------------------------

def summarise_post_fit_ppc(ppc_results):
    print("\n=== Majority accuracy (full jury) ===")
    a = np.percentile(
        ppc_results["majority_accuracy_15"], [5, 50, 95]
    )
    print(a)

    print("\n=== Unanimity rate (full jury) ===")
    b = np.percentile(
        ppc_results["unanimity_rate_15"], [5, 50, 95]
    )
    print(b)
    summaries = []
    percentiles = [5, 50, 95]
    print("\n=== Majority accuracy by jury size ===")
    for k in jury_sizes:
        print(f"Jury size {k}:")
        c = np.percentile(
                ppc_results["majority_accuracy_by_size"][k],
                percentiles
            )
        print(c)
        summaries.append(c)


    print("\n=== Mean juror agreement rates ===")
    d = ppc_results["agreement_rates"].mean(axis=0)
    print(d)
    summaries.append(d)
    summaries_df = pd.DataFrame(summaries[:-1]).T
    majorities = [f'majority_accuracy_{i}' for i in jury_sizes]
    columns = majorities
    summaries_df.columns = columns
    summaries_df.index = [f'percentile_{i}' for i in percentiles]
    return summaries_df


def compare_prior_posterior(idata):
    p_juror_samples_prior = (idata.prior["p_juror"].stack(sample=("chain", "draw")).values)  
    p_juror_samples_posterior = (idata.posterior["p_juror"].stack(sample=("chain", "draw")).values)  
    n_jurors, n_samples = p_juror_samples_posterior.shape
    ppc_result_prior = run_post_fit_ppc(p_juror_samples_prior, n_cases, true_states)
    ppc_result_posterior = run_post_fit_ppc(p_juror_samples_posterior, n_cases, true_states)
    summaries_prior = summarise_post_fit_ppc(ppc_result_prior)
    summaries_posterior = summarise_post_fit_ppc(ppc_result_posterior)

    summary = pd.concat({'prior': summaries_prior, 'posterior': summaries_posterior})
    return summary, ppc_result_posterior
Code
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def plot_differences(df):
    # 1. Clean up column names to get numeric x-axis values
    # Assuming your df is named 'df'
    x_values = [3, 5, 7, 10, 15] # Extracted from your column headers
    # If you have 'majority_accuracy_15' as mentioned in your text, add 15 to this list.

    # 2. Extract the specific rows for plotting
    prior_median = df.loc[('prior', 'percentile_50')]
    prior_low = df.loc[('prior', 'percentile_5')]
    prior_high = df.loc[('prior', 'percentile_95')]

    post_median = df.loc[('posterior', 'percentile_50')]
    post_low = df.loc[('posterior', 'percentile_5')]
    post_high = df.loc[('posterior', 'percentile_95')]

    # 3. Create the plot
    plt.figure(figsize=(10, 6))

    # Plot Prior
    plt.plot(x_values, prior_median, label='Prior Median', color='blue', marker='o')
    plt.fill_between(x_values, prior_low, prior_high, color='blue', alpha=0.2, label='Prior (5th-95th)')

    # Plot Posterior
    plt.plot(x_values, post_median, label='Posterior Median', color='red', marker='o')
    plt.fill_between(x_values, post_low, post_high, color='red', alpha=0.2, label='Posterior (5th-95th)')

    # Formatting
    plt.title('Majority Accuracy: Prior vs Posterior Distributions')
    plt.xlabel('Number of Jurors (n) in Majority Calculation')
    plt.ylabel('Majority Accuracy Score')
    plt.xticks(x_values)
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.6)

    plt.show()

def plot_error_corr_heatmap(ppc_results, title="Error correlation"):
    mean_corr = ppc_results["error_corr"].mean(axis=0)

    plt.figure(figsize=(6, 5))
    sns.heatmap(
        mean_corr,
        vmin=-0.3,
        vmax=0.3,
        cmap="coolwarm",
        square=True,
        cbar_kws={"label": "Error correlation"}
    )
    plt.title(title)
    plt.tight_layout()
    plt.show()
summaries_weak_discrimination,  ppc_result_posterior_weak_discrimination = compare_prior_posterior(traces_discrimination['weak_discrimination'])

summaries_weak_discrimination
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/numpy/lib/_function_base_impl.py:3065: RuntimeWarning: invalid value encountered in divide
  c /= stddev[:, None]
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/numpy/lib/_function_base_impl.py:3066: RuntimeWarning: invalid value encountered in divide
  c /= stddev[None, :]

=== Majority accuracy (full jury) ===
[0.36 0.86 1.  ]

=== Unanimity rate (full jury) ===
[0.   0.   0.04]

=== Majority accuracy by jury size ===
Jury size 3:
[0.4   0.7   0.921]
Jury size 5:
[0.4  0.74 0.96]
Jury size 7:
[0.38 0.76 0.98]
Jury size 10:
[0.42 0.82 1.  ]
Jury size 15:
[0.36 0.86 1.  ]

=== Mean juror agreement rates ===
[0.61964 0.64204 0.6262  0.63484 0.63676 0.64796 0.63656 0.6322  0.63752
 0.641   0.62172 0.62816 0.62896 0.63316 0.639  ]

=== Majority accuracy (full jury) ===
[0.9  0.96 1.  ]

=== Unanimity rate (full jury) ===
[0.   0.   0.02]

=== Majority accuracy by jury size ===
Jury size 3:
[0.68 0.8  0.88]
Jury size 5:
[0.74 0.84 0.92]
Jury size 7:
[0.8  0.88 0.96]
Jury size 10:
[0.84 0.92 0.98]
Jury size 15:
[0.9  0.96 1.  ]

=== Mean juror agreement rates ===
[0.694615 0.70128  0.706215 0.700695 0.700355 0.70774  0.693205 0.70913
 0.703775 0.708515 0.71373  0.71302  0.70089  0.705545 0.703755]
majority_accuracy_3 majority_accuracy_5 majority_accuracy_7 majority_accuracy_10 majority_accuracy_15
prior percentile_5 0.400 0.40 0.38 0.42 0.36
percentile_50 0.700 0.74 0.76 0.82 0.86
percentile_95 0.921 0.96 0.98 1.00 1.00
posterior percentile_5 0.680 0.74 0.80 0.84 0.90
percentile_50 0.800 0.84 0.88 0.92 0.96
percentile_95 0.880 0.92 0.96 0.98 1.00
plot_error_corr_heatmap(ppc_result_posterior_weak_discrimination)

summarise_error_corr(ppc_result_posterior_weak_discrimination)

{'mean_off_diag': np.float64(-0.0002439357891321022),
 'sd_off_diag': np.float64(0.14286033462137696),
 'p95_abs_corr': np.float64(0.27759801501994014)}
plot_differences(summaries_weak_discrimination)

summaries_moderate_discrimination, ppc_result_posterior_moderate_discrimination = compare_prior_posterior(traces_discrimination['moderate_discrimination'])

summaries_moderate_discrimination
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/numpy/lib/_function_base_impl.py:3065: RuntimeWarning: invalid value encountered in divide
  c /= stddev[:, None]
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/numpy/lib/_function_base_impl.py:3066: RuntimeWarning: invalid value encountered in divide
  c /= stddev[None, :]

=== Majority accuracy (full jury) ===
[0.32 0.86 1.  ]

=== Unanimity rate (full jury) ===
[0.   0.   0.04]

=== Majority accuracy by jury size ===
Jury size 3:
[0.4 0.7 0.9]
Jury size 5:
[0.42 0.74 0.96]
Jury size 7:
[0.38 0.76 0.98]
Jury size 10:
[0.38 0.81 1.  ]
Jury size 15:
[0.32 0.86 1.  ]

=== Mean juror agreement rates ===
[0.62964 0.6278  0.63744 0.63632 0.617   0.6254  0.61976 0.63256 0.63128
 0.62652 0.63736 0.63448 0.62104 0.6332  0.63088]

=== Majority accuracy (full jury) ===
[0.9  0.96 1.  ]

=== Unanimity rate (full jury) ===
[0.   0.   0.02]

=== Majority accuracy by jury size ===
Jury size 3:
[0.68 0.8  0.88]
Jury size 5:
[0.74 0.84 0.92]
Jury size 7:
[0.78 0.88 0.96]
Jury size 10:
[0.84 0.92 0.98]
Jury size 15:
[0.9  0.96 1.  ]

=== Mean juror agreement rates ===
[0.69193  0.700095 0.704435 0.700705 0.700585 0.707545 0.69066  0.711665
 0.70335  0.708425 0.70878  0.71398  0.69932  0.70703  0.70433 ]
majority_accuracy_3 majority_accuracy_5 majority_accuracy_7 majority_accuracy_10 majority_accuracy_15
prior percentile_5 0.40 0.42 0.38 0.38 0.32
percentile_50 0.70 0.74 0.76 0.81 0.86
percentile_95 0.90 0.96 0.98 1.00 1.00
posterior percentile_5 0.68 0.74 0.78 0.84 0.90
percentile_50 0.80 0.84 0.88 0.92 0.96
percentile_95 0.88 0.92 0.96 0.98 1.00
plot_error_corr_heatmap(ppc_result_posterior_moderate_discrimination)

summarise_error_corr(ppc_result_posterior_moderate_discrimination)

{'mean_off_diag': np.float64(0.0001145121720577443),
 'sd_off_diag': np.float64(0.1429544471193103),
 'p95_abs_corr': np.float64(0.27695585470349854)}
plot_differences(summaries_moderate_discrimination)

summaries_strong_discrimination, ppc_result_posterior_strong_discrimination = compare_prior_posterior(traces_discrimination['strong_discrimination'])

summaries_strong_discrimination
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/numpy/lib/_function_base_impl.py:3065: RuntimeWarning: invalid value encountered in divide
  c /= stddev[:, None]
/Users/nathanielforde/mambaforge/envs/applied-bayesian-regression-modeling-env/lib/python3.13/site-packages/numpy/lib/_function_base_impl.py:3066: RuntimeWarning: invalid value encountered in divide
  c /= stddev[None, :]

=== Majority accuracy (full jury) ===
[0.159 0.86  1.   ]

=== Unanimity rate (full jury) ===
[0.   0.   0.02]

=== Majority accuracy by jury size ===
Jury size 3:
[0.34 0.68 0.9 ]
Jury size 5:
[0.32 0.72 0.94]
Jury size 7:
[0.26 0.74 0.96]
Jury size 10:
[0.26 0.82 0.98]
Jury size 15:
[0.159 0.86  1.   ]

=== Mean juror agreement rates ===
[0.60028 0.61212 0.59256 0.60508 0.60072 0.6142  0.63324 0.62096 0.60088
 0.58748 0.62488 0.60488 0.60544 0.59912 0.60396]

=== Majority accuracy (full jury) ===
[0.9  0.96 1.  ]

=== Unanimity rate (full jury) ===
[0.   0.   0.02]

=== Majority accuracy by jury size ===
Jury size 3:
[0.68 0.8  0.88]
Jury size 5:
[0.74 0.84 0.92]
Jury size 7:
[0.78 0.88 0.96]
Jury size 10:
[0.84 0.92 0.98]
Jury size 15:
[0.9  0.96 1.  ]

=== Mean juror agreement rates ===
[0.693955 0.701255 0.706355 0.70056  0.69895  0.707245 0.692345 0.71037
 0.70129  0.710385 0.711215 0.7117   0.7015   0.708655 0.702275]
majority_accuracy_3 majority_accuracy_5 majority_accuracy_7 majority_accuracy_10 majority_accuracy_15
prior percentile_5 0.34 0.32 0.26 0.26 0.159
percentile_50 0.68 0.72 0.74 0.82 0.860
percentile_95 0.90 0.94 0.96 0.98 1.000
posterior percentile_5 0.68 0.74 0.78 0.84 0.900
percentile_50 0.80 0.84 0.88 0.92 0.960
percentile_95 0.88 0.92 0.96 0.98 1.000
plot_error_corr_heatmap(ppc_result_posterior_strong_discrimination)

summarise_error_corr(ppc_result_posterior_strong_discrimination)

{'mean_off_diag': np.float64(7.785091800380223e-05),
 'sd_off_diag': np.float64(0.14292377897521807),
 'p95_abs_corr': np.float64(0.2775980150199401)}
plot_differences(summaries_strong_discrimination)

Case Difficulty

def ppc_with_case_difficulty(
    idata,
    n_cases,
    sigma_case,
    rng=np.random.default_rng(123),
    true_states = None
):
    """
    PPC generator with shared case-level shocks.
    """

    logit_p = (idata.posterior['logit_p_juror']
    .stack(sample=("chain", "draw")).values)

    n_jurors, n_samples = logit_p.shape
    truth = np.zeros((n_samples, n_cases))
    if true_states is None: 
        true_states = rng.binomial(1, 0.5, size=n_cases)
    truth[:, ] = true_states
    delta_case = rng.normal(0.0, sigma_case, size=(n_samples, n_cases))

    votes = np.zeros((n_samples, n_cases, n_jurors), dtype=int)

    for s in range(n_samples):
        for i in range(n_cases):
            sign = 1 if truth[s, i] == 1 else -1
            logits = sign * logit_p[:, s] + delta_case[s, i]
            p = 1 / (1 + np.exp(-logits))
            votes[s, i] = rng.binomial(1, p)

    return {
        "votes": votes,
        "true_state": truth,
        "delta_case": delta_case,
    }


for sigma in [0.0, 0.2, 0.5, 1]:
    ppc = ppc_with_case_difficulty(
        traces_discrimination['weak_discrimination'],
        n_cases=50,
        sigma_case=sigma,
        true_states=true_states
    )

    n_samples = ppc['votes'].shape[0]
    corrs = []

    for s in range(n_samples):
        C = error_correlation(ppc["votes"][s, :], ppc['true_state'][s])
        corrs.append(C)

    corrs = np.stack(corrs)
    off_diag = corrs[:, ~np.eye(corrs.shape[1], dtype=bool)]

    acc = [majority_accuracy(ppc["votes"][i, :, :], ppc["true_state"][i, :]) for i in range(n_samples)]

    print(f"\nσ_case = {sigma}")
    print("Mean majority accuracy:", np.mean(acc))
    print("mean_corr", np.nanmean(off_diag))
    print("median_corr", np.nanmedian(off_diag))
    print("p95_abs_corr", np.nanpercentile(np.abs(off_diag), 95))
    print("nan_fraction", np.isnan(off_diag).mean())

σ_case = 0.0
Mean majority accuracy: 0.9528000000000001
mean_corr -0.0004735283739258481
median_corr -0.006593661395050807
p95_abs_corr 0.2762832423274467
nan_fraction 0.0

σ_case = 0.2
Mean majority accuracy: 0.940575
mean_corr 0.008204727656772869
median_corr 0.005636260652626381
p95_abs_corr 0.2777155134201167
nan_fraction 0.0

σ_case = 0.5
Mean majority accuracy: 0.8855
mean_corr 0.04870339440464984
median_corr 0.04761904761904767
p95_abs_corr 0.30012252399939066
nan_fraction 0.0

σ_case = 1
Mean majority accuracy: 0.777865
mean_corr 0.16094673060834333
median_corr 0.16390251702679648
p95_abs_corr 0.40247590361231644
nan_fraction 0.0

Citation

BibTeX citation:
@online{forde,
  author = {Forde, Nathaniel},
  title = {The {Condorcet} {Jury} {Theorem} and {Democratic}
    {Rationality}},
  langid = {en}
}
For attribution, please cite this work as:
Forde, Nathaniel. n.d. “The Condorcet Jury Theorem and Democratic Rationality.”